/*******************************************************************************

  Paying Outsourced Labor: Direct Evidence from Linked Temp Agency-Worker-Client Data

  By Andres Drenik, Simon Jäger, Pascuel Plotkin and Benjamin Schoefer
	January 7th, 2021

	DESCRIPTION: Filters main dataset for akm analysis: Drops public workers, generates real wages,
	applies minimum wage threshold, keeps highest paying job in a month for each worker.

*******************************************************************************/



/********************************************************************************
***** Preliminaries
********************************************************************************/
set more off
cap log close
local curr_date = c(current_date)
log using "${logs}/05_Filter_Full_Data`curr_date'", replace


/****************************************************************************************
* Append 2010-2017 to 2009 Dataset
****************************************************************************************/

use "${Data_with_filter}/Argentina_Clean_2009.dta", clear
forvalues y = 2010/2017 {
	append using "${Data_with_filter}/Argentina_Clean_`y'.dta", force
}

/****************************************************************************************
* Drop useless observations for our analysis
	* Drop duplicated observations for temp workers (modalidad = 102)
	* Only keep private sector workers
	* Only keep workers that have 1 user firm per month
****************************************************************************************/

	drop if modalidad == 102
	drop if public_worker == 1
	drop if multiple_user_firm == 1
	drop if base_registro_match == 2


/****************************************************************************************
* Drop useless variables
****************************************************************************************/

	capture drop base_registro_match
	capture drop multiple_user_firm
	capture drop cuit_user_firm2
	capture drop cuit_user_firm3
	capture drop cuit_user_firm4
	capture drop cuit_user_firm5

	* Rename variable that flags the user firm
	rename cuit_user_firm1 cuit_user_firm

	* Generate 2 digit industry code
	gen industry_code_2digit = int(ciiu_4/100)

/****************************************************************************************
* Merge Price level Time Series to create variable for Real Wage
****************************************************************************************/

	* Merge Time Series data
	merge m:1 date using "${input}/ArgentinaPriceLevel.dta", keep(matched) keepusing(date index) nogen

	* Create Real Wage variables (in logs)
	gen real_wage = remuner_total/index

	*Winsor top/bottom 1% by year
	winsor2 real_wage, replace cuts(1 99) by(year)

	gen log_real_wage = log(real_wage)

/****************************************************************************************
* Merge Minimum Wage Time Series to drop extreme observations
****************************************************************************************/

	* Merge Minimum wage threshold data
	merge m:1 date using "${intermediate_data_clea}/WageThreshold.dta", keep(matched) keepusing(mw_threshold) nogen

	* Drop observations that are under 0.5 of the average minnimum wage in 2008 (threshold)
	drop if real_wage < 0.5*mw_threshold
	drop mw_threshold

/****************************************************************************************
* Clean Dataset
	* Drop any other job the Temp worker might have had during the time they were temp workers
	* Keep only the highest paying job in a month
	* Drop first and last observation of each spell
****************************************************************************************/

	* Keep only the highest paying job (For regular workers)
	drop if remuner_total == .
	bys cuil_trab date (remuner_total): drop if _n != _N & temp_worker != 1

	* Identify months in which a worker was a temp worker
	gegen temp_work_period = sum(temp_worker), by(cuil_trab date)
	replace temp_work_period = 1 if temp_work_period>=1
	replace temp_work_period = 0 if temp_work_period!=1

	* Drop any other job the temp worker had during the period he/she was a temp worker
	drop if temp_work_period == 1 & temp_worker == 0

	* Keep the highest paying temp job
	bys cuil_trab date temp_worker (remuner_total): drop if _n != _N & temp_worker == 1

	* Drop the first and last observation of each spell
	bys cuil_trab real_firm (date): drop if _n == 1
	bys cuil_trab real_firm (date): drop if _n == _N

	save "${intermediate_data_emp}/full_Dataset_Clean.dta", replace

log close
